Topic Scores

This analysis analyzes Arctic Council speeches and national arctic strategy documents by comparing the relative frequencies of words belonging to ten different topics. We have defined these topics by creating dictionaries of commonly mentioned words that relate to each.

The method used here calculates the fraction of words in each document belonging to each topic to calculate a document-level score. It then averages document-level scores for each country to create a country-level score.

Preparing the data

This section loads the libraries and the text files sfrom three different folders. It also contains some excess code. Moving forward, we use the dataframes developed from the readtext function, not the corpuses, dtms, or dfms.

This version does not remove stop words, punctuation, etc. This should not adversely affect our results, since we are using a dictionaries method to calculate our scores.

library(igraph)
library(tidyverse)
library(tidytext)
library(readtext)
library(quanteda)
library(dplyr)
library(stringr)
library(plotly)
library(rworldmap)
library(ngram)

#This sets the directory where the texts are located
DATA_DIR <- "C:/Users/laura/OneDrive/Desktop/Krogh-Arctic/Strategy Documents (new)"  

#This command reads in all the file names and stores the texts in a tidy dataframe
strategy <- readtext(paste0(DATA_DIR, "/*"))

#This identifies the row names of the dataframe, which are also used in the corpus
#Note that these docnames are not as nice as for the UN Corpus as they vary a bit

row.names(strategy) <- strategy$doc_id


#Do again for the other folder
DATA_DIR <- "C:/Users/laura/OneDrive/Desktop/Krogh-Arctic/Observer Documents"  
observer <- readtext(paste0(DATA_DIR, "/*"))
row.names(observer) <- observer$doc_id


#Third folder
DATA_DIR <- "C:/Users/laura/OneDrive/Desktop/Krogh-Arctic/Arctic Speeches"  
speeches <- readtext(paste0(DATA_DIR, "/*"))
row.names(speeches) <- speeches$doc_id

#Fourth folder
DATA_DIR <- "C:/Users/laura/OneDrive/Desktop/Krogh-Arctic/Official Speeches and Documents"  
officialspeeches <- readtext(paste0(DATA_DIR, "/*"))
row.names(officialspeeches) <- officialspeeches$doc_id

Combining the data frames and restructing

#unnest the tokens (words) and create a new data frame with each word as one row
strategydf <- unnest_tokens(strategy, word, text)

observerdf <- unnest_tokens(observer, word, text)

speechesdf <- unnest_tokens(speeches, word, text)

officialspeechesdf <- unnest_tokens(officialspeeches, word, text)

#combine full text versions
fulltexts <- strategy %>%
  full_join(observer) %>%
  full_join(speeches) %>%
  full_join(officialspeeches) 
## Joining, by = c("doc_id", "text")
## Joining, by = c("doc_id", "text")
## Joining, by = c("doc_id", "text")
#combine the three data frames
totaldf <- strategydf %>%
  full_join(observerdf) %>%
  full_join(speechesdf) %>%
  full_join(officialspeechesdf) 
## Joining, by = c("doc_id", "word")
## Joining, by = c("doc_id", "word")
## Joining, by = c("doc_id", "word")
#calculate the document lengths
words <- totaldf %>%
  group_by(doc_id) %>%
  mutate(length=n()) %>%
  ungroup()

#separate the doc_id into country and everything that follows
words <- words %>%
  mutate(doc_id2=doc_id) %>%
  separate(doc_id2, c("country", "misc"), sep = "_") 
## Warning: Expected 2 pieces. Additional pieces discarded in 20086 rows
## [154450, 154451, 154452, 154453, 154454, 154455, 154456, 154457, 154458,
## 154459, 154460, 154461, 154462, 154463, 154464, 154465, 154466, 154467,
## 154468, 154469, ...].
#the same for full text
fulltexts <- fulltexts %>%
  mutate(doc_id2=doc_id) %>%
  separate(doc_id2, c("country", "misc"), sep = "_") 
## Warning: Expected 2 pieces. Additional pieces discarded in 13 rows [14, 16,
## 18, 20, 22, 23, 24, 27, 28, 56, 65, 67, 71].

Defining the topic dictionaries

There are ten topics: 1. environment 2. indigenous 3. transport 4. development 5. tourism 6. resources 7. fisheries 8. diplomacy 9. security 10. russia 11. legal

environment <- data.frame(c("research", "science", "scienti", "environment","climate","climate change","ocean","sea","sea level","atmosphere", "air", "ice","warm", "melt","knowledge", "station","glaciological","geological","biological","ecosystem","paleoclimate","laboratory","conservation","preservation","temperature","data","measurement","study","precipitation","pollution","cryospheric","publication","biodiversity","academic", "glacier", "disaster", "observe", "trend","predict","species","force","global warming","protect" ))
colnames(environment) <- "topic"
environment <- mutate(environment, name = "environment")

indigenous <- data.frame(c("nation", "local", "indigenous", "peoples", "community", "human", "social", "lives", "condition", "inhabitants", "well-being", "language", "health", "traditional", "culture", "rural", "residents"))
colnames(indigenous) <- "topic"
indigenous <- mutate(indigenous, name = "indigenous")


transport <- data.frame(c("transportation", "shipping", "import", "export", "maritime", "transport", "ship", "vessel", "navigation", "route", "channel", "northeast passage", "northwest passage", "northern sea route", "voyage", "commercial", "trade", "icebreakers", "water", "transit"))
colnames(transport) <- "topic"
transport <- mutate(transport, name = "transport")

development <- data.frame(c("Sustainable","development","economic","globalization","economic zones","commercial","production","strategy","benefit","capital","market","enterprise","opportunity","business","infrastructure","fund","industry"))
colnames(development) <- "topic"
development <- mutate(development, name = "development")

tourism <- data.frame(c("tourism","tourists","rescue","ecotourism"))
colnames(tourism) <- "topic"
tourism <- mutate(tourism, name = "tourism")

resources <- data.frame(c("oil","industr","resource","technology","energy","gas","carbon","infrastructure","build","exploit","mine","utilization","exploitation","natural","mineral","geothermal","wind","exploration","consumer","pipeline","extraction", "seabed", "metal", "metals", "seafloor", "offshore"))
colnames(resources) <- "topic"
resources <- mutate(resources, name = "resources")

fisheries <- data.frame(c("fish","fisheries","fishing","aquaculture","goods"))
colnames(fisheries) <- "topic"
fisheries <- mutate(fisheries, name = "fisheries")

diplomacy <- data.frame(c("strengthen","joint","relationship","peace","integration","cooperation","international","relations","diplomatic","contribute","parties","stability","equality","participants","connect","multilateral","bilateral","regional","global","coalition","collaboration","coordination","share","same","affairs","harmony","alliance","partnership","freedom","political","meet"))
colnames(diplomacy) <- "topic"
diplomacy <- mutate(diplomacy, name = "diplomacy")

security <- data.frame(c("sovereignty", "state", "nation", "secure", "security", "stakeholder", "governance", "claim", "interests", "territory", "zone", "own", "influence", "military", "defend", "defense", "position", "independent"))
colnames(security) <- "topic"
security <- mutate(security, name = "security")

russia <- data.frame(c("russia"))
colnames(russia) <- "topic"
russia <- mutate(russia, name = "russia")

china <- data.frame(c("china"))
colnames(china) <- "topic"
china <- mutate(china, name = "china")

legal <- data.frame(c("continental shelf", "rule", "UNCLOS", "jurisdiction", "rights", "spitsbergen", "legal", "law", "just", "treaty", "treaties", "regulation", "boundary", "boundaries", "delimitation", "EEZ", "arbitration", "court", "ICJ", "ruling", "protocol", "reservation", "convention", "contiguous zone", "right", "ratification", "ratified"))
colnames(legal) <- "topic"
legal <- mutate(legal, name = "legal")

Generating scores

#defining a function to calculate the scores
# first count the words belonging to each topic in each document and create document score by dividing the count by the length of the document

countwords <- function(topic) {
docscores <- words %>%
   inner_join(topic, by= c("word" = "topic")) %>%
   group_by(doc_id) %>%
  mutate(count = n(), doclength=mean(length), score=count/doclength) %>%
   ungroup()

#calculating country scores by averaging document scores
countryscores <- docscores %>%
  group_by(country) %>%
  summarize(country_score = mean(score)) %>%
  mutate(name =topic$name[1] )
}

Graphing scores

#Environment
#calcualte scores
environment_scores <- countwords(environment)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
## Warning: package 'bindrcpp' was built under R version 3.4.4
#plot
p.environment <- ggplot(environment_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Environment")
#make interactive
ggplotly(p.environment)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Indigenous
indigenous_scores <- countwords(indigenous)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.indigenous <- ggplot(indigenous_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Indigenous")
ggplotly(p.indigenous)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Transport
transport_scores <- countwords(transport)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.transport <- ggplot(transport_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("transport")
ggplotly(p.transport)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Development
development_scores <- countwords(development)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.development <- ggplot(development_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Development")
ggplotly(p.development)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Tourism
tourism_scores <- countwords(tourism)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.tourism <- ggplot(tourism_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Tourism")
ggplotly(p.tourism)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Resources
resources_scores <- countwords(resources)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.resources <- ggplot(resources_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Resources")
ggplotly(p.resources)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Fisheries
fisheries_scores <- countwords(fisheries)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.fisheries <- ggplot(fisheries_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Fisheries")
ggplotly(p.fisheries)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Diplomacy
diplomacy_scores <- countwords(diplomacy)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.diplomacy <- ggplot(diplomacy_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Diplomacy")
ggplotly(p.diplomacy)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Security
security_scores <- countwords(security)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.security <- ggplot(security_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Security")
ggplotly(p.security)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Russia
russia_scores <- countwords(russia)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.russia <- ggplot(russia_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Russia")
ggplotly(p.russia)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#China
china_scores <- countwords(china)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.china <- ggplot(china_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("China")
ggplotly(p.china)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#legal
legal_scores <- countwords(legal)
## Warning: Column `word`/`topic` joining character vector and factor,
## coercing into character vector
p.legal <- ggplot(legal_scores, aes(x=reorder(country,country_score), y=country_score)) + geom_point() + coord_flip() +
  ggtitle("Legal")
ggplotly(p.legal)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

##New Score Method

countwords2 <- function(topic){
docscores <- fulltexts
docscores$count <- sapply(fulltexts$text, function(x) sum(apply(topic, 1, function(z) str_count(x, z)))) 
docscores$doclength <- sapply(fulltexts$text, function(x) wordcount(x))

docscores <- docscores %>%
  mutate(score=count/doclength) 

#calculating country scores by averaging document scores
countryscores <- docscores %>%
  group_by(country) %>%
  summarize(country_score = mean(score)) %>%
  mutate(name =topic$name[1] )
}

#countgraph <- function(topic){
#counts1 <- countwords(topic)  
#counts2 <- countwords2(topic)
#name <- paste0(topic$name[1],"_scoresall")
#countsall <- full_join(counts1, counts2, by="country")
#assign(as.character(name), countsall)
#return(get(name))
#p.2 <-ggplot(countsall) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
 #  ggtitle(topic)
#print(p.2)
#}
#countgraph(transport)

#Environment
#calcualte scores
environment_scores2 <- countwords2(environment)
environment_all <- full_join(environment_scores, environment_scores2, by = "country")
#plot
p.environment2 <- ggplot(environment_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Environment")
#make interactive
ggplotly(p.environment2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Indigenous
indigenous_scores2 <- countwords2(indigenous)
environment_all <- full_join(indigenous_scores, indigenous_scores2, by = "country")
#plot
p.indigenous2 <- ggplot(environment_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Indigenous")
#make interactive
ggplotly(p.indigenous2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Transport
transport_scores2 <- countwords2(transport)
environment_all <- full_join(transport_scores, transport_scores2, by = "country")
#plot
p.transport2 <- ggplot(environment_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Transport")
#make interactive
ggplotly(p.transport2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Development
development_scores2 <- countwords2(development)
development_all <- full_join(development_scores, development_scores2, by = "country")
#plot
p.development2 <- ggplot(development_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Development")
#make interactive
ggplotly(p.development2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Tourism
tourism_scores2 <- countwords2(tourism)
tourism_all <- full_join(tourism_scores, tourism_scores2, by = "country")
#plot
p.tourism2 <- ggplot(tourism_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Tourism")
#make interactive
ggplotly(p.transport2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Resources
resources_scores2 <- countwords2(resources)
resources_all <- full_join(resources_scores, resources_scores2, by = "country")
#plot
p.resources2 <- ggplot(resources_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Resources")
#make interactive
ggplotly(p.resources2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Fisheries
fisheries_scores2 <- countwords2(fisheries)
fisheries_all <- full_join(fisheries_scores, fisheries_scores2, by = "country")
#plot
p.fisheries2 <- ggplot(fisheries_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Fisheries")
#make interactive
ggplotly(p.fisheries2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Diplomacy
diplomacy_scores2 <- countwords2(diplomacy)
diplomacy_all <- full_join(diplomacy_scores, diplomacy_scores2, by = "country")
#plot
p.diplomacy2 <- ggplot(diplomacy_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Diplomacy")
#make interactive
ggplotly(p.diplomacy)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Security
security_scores2 <- countwords2(security)
security_all <- full_join(security_scores, security_scores2, by = "country")
#plot
p.security2 <- ggplot(security_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Security")
#make interactive
ggplotly(p.security2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#Legal
legal_scores2 <- countwords2(legal)
legal_all <- full_join(legal_scores, legal_scores2, by = "country")
#plot
p.legal2 <- ggplot(legal_all) + geom_point(aes(x=reorder(country,country_score.y), y=country_score.y)) +  coord_flip() +
  ggtitle("Legal")
#make interactive
ggplotly(p.legal2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Mapping

mapscores <- function(topic) {
  name <- deparse(substitute(topic))
  joinCountryData2Map(topic, joinCode = "NAME", nameJoinColumn = "country") %>%
  mapCountryData(nameColumnToPlot = "country_score", colourPalette = "heat", addLegend = TRUE, borderCol = "grey", mapTitle = name)
}

mapscores(environment_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data

mapscores(indigenous_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data

mapscores(transport_scores)
## 21 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 222 codes from the map weren't represented in your data

mapscores(development_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data

mapscores(tourism_scores)
## 18 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 225 codes from the map weren't represented in your data

mapscores(resources_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data

mapscores(fisheries_scores)
## 18 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 225 codes from the map weren't represented in your data

mapscores(diplomacy_scores)
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data

mapscores(security_scores)
## 21 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 222 codes from the map weren't represented in your data

mapscores(russia_scores)
## 18 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 225 codes from the map weren't represented in your data

mapscores(china_scores)
## 13 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 230 codes from the map weren't represented in your data

mapscores(legal_scores)
## 21 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 222 codes from the map weren't represented in your data

Sentiments

sentimentscores <- words %>%
  inner_join(get_sentiments("bing"))  %>%
   group_by(doc_id) %>%
  filter(sentiment=="positive") %>%
  mutate(count = n(), doclength=mean(length), score=count/doclength) %>%
   ungroup() %>%
  group_by(country) %>%
  summarize(country_score = mean(score))
## Joining, by = "word"
joinCountryData2Map(sentimentscores, joinCode = "NAME", nameJoinColumn = "country") %>%
  mapCountryData(nameColumnToPlot = "country_score", colourPalette = "heat", addLegend = TRUE, borderCol = "grey", mapTitle = "positive sentiment")
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data

fearsentimentscores <- words %>%
  inner_join(get_sentiments("nrc"))  %>%
   group_by(doc_id) %>%
  filter(sentiment=="fear") %>%
  mutate(count = n(), doclength=mean(length), score=count/doclength) %>%
   ungroup() %>%
  group_by(country) %>%
  summarize(country_score = mean(score))
## Joining, by = "word"
joinCountryData2Map(fearsentimentscores, joinCode = "NAME", nameJoinColumn = "country") %>%
  mapCountryData(nameColumnToPlot = "country_score", colourPalette = "heat", addLegend = TRUE, borderCol = "grey", mapTitle = "fear sentiment")
## 22 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## 221 codes from the map weren't represented in your data

Single Country Analysis

countrydf <- environment_scores %>%
   full_join(indigenous_scores) %>%
    full_join(transport_scores)  %>%
    full_join(development_scores) %>%
  full_join(tourism_scores) %>%
  full_join(resources_scores) %>%
 full_join(fisheries_scores) %>%
 full_join(diplomacy_scores) %>%
 full_join(security_scores) %>%
 full_join(russia_scores) %>%
 full_join(china_scores) %>%
  full_join(legal_scores)
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
countrygraph <- function(country.name) {
countrydf2 <- filter(countrydf, country==country.name)

ggplot(countrydf2, aes(x=name, y=country_score)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_x_discrete(country.name)
}

countrygraph("Canada")

countrygraph("China")

countrygraph("Denmark")

countrygraph("Finland")

countrygraph("France")

countrygraph("Germany")

countrygraph("Greenland")

countrygraph("Iceland")

countrygraph("India")

countrygraph("Italy")

countrygraph("Japan")

countrygraph("Korea")

countrygraph("Netherlands")

countrygraph("Norway")

countrygraph("Poland")

countrygraph("Russia")

countrygraph("Singapore")

countrygraph("Spain")

countrygraph("Sweden")

countrygraph("Switzerland")

countrygraph("UK")

countrygraph("US")

Single Country Analysis

country2df <- environment_scores2 %>%
   full_join(indigenous_scores2) %>%
    full_join(transport_scores2)  %>%
    full_join(development_scores2) %>%
  full_join(tourism_scores2) %>%
  full_join(resources_scores2) %>%
 full_join(fisheries_scores2) %>%
 full_join(diplomacy_scores2) %>%
 full_join(security_scores2) %>%
  full_join(legal_scores2)
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
## Joining, by = c("country", "country_score", "name")
countrygraph2 <- function(country.name) {
countrydf2 <- filter(country2df, country==country.name)

ggplot(countrydf2, aes(x=name, y=country_score)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_x_discrete(paste0(country.name, 2))
}

countrygraph2("Canada")

countrygraph2("China")

countrygraph2("Denmark")

countrygraph2("Finland")

countrygraph2("France")

countrygraph2("Germany")

countrygraph2("Greenland")

countrygraph2("Iceland")

countrygraph2("India")

countrygraph2("Italy")

countrygraph2("Japan")

countrygraph2("Korea")

countrygraph2("Netherlands")

countrygraph2("Norway")

countrygraph2("Poland")

countrygraph2("Russia")

countrygraph2("Singapore")

countrygraph2("Spain")

countrygraph2("Sweden")

countrygraph2("Switzerland")

countrygraph2("UK")

countrygraph2("US")

countrymutli <- country2df %>%
filter(country=="US"|country=="Russia"|country=="China") %>%
  filter(name!="tourism" & name!= "environment" & name != "fisheries")

p.countrymulti <- ggplot(countrymutli, aes(x=name, y=country_score, fill=country)) + geom_bar(stat="identity", position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplotly(p.countrymulti)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Creating the categories

scoremean <- mean(countrydf$country_score)
scoresd <- sd(countrydf$country_score)


countrydf <- countrydf %>%
  mutate(sdmean = (country_score-scoremean)/scoresd)

scoresq <- quantile(countrydf$sdmean, probs = c(0, 0.25, 0.5, 0.75, 1))

countrydf <- countrydf  %>%
  mutate(rank = ifelse(sdmean < scoresq[2], "very low", 
                       ifelse(sdmean>scoresq[2] & sdmean < scoresq[3], "low", 
                               ifelse(sdmean>scoresq[3] & sdmean < scoresq[4], "medium", "high"))))
  
cleantable <- countrydf %>% spread(name, rank) %>%
  select(country, china:transport)  %>%
  group_by(country) %>%
  arrange(country, environment, diplomacy, legal, indigenous, development, transport, fisheries, resources, security, china, russia)

 cleantable
## # A tibble: 232 x 13
## # Groups:   country [22]
##    country china development diplomacy environment fisheries indigenous
##    <chr>   <chr> <chr>       <chr>     <chr>       <chr>     <chr>     
##  1 Canada  <NA>  <NA>        <NA>      high        <NA>      <NA>      
##  2 Canada  <NA>  <NA>        high      <NA>        <NA>      <NA>      
##  3 Canada  <NA>  <NA>        <NA>      <NA>        <NA>      <NA>      
##  4 Canada  <NA>  <NA>        <NA>      <NA>        <NA>      high      
##  5 Canada  <NA>  medium      <NA>      <NA>        <NA>      <NA>      
##  6 Canada  <NA>  <NA>        <NA>      <NA>        <NA>      <NA>      
##  7 Canada  <NA>  <NA>        <NA>      <NA>        very low  <NA>      
##  8 Canada  <NA>  <NA>        <NA>      <NA>        <NA>      <NA>      
##  9 Canada  <NA>  <NA>        <NA>      <NA>        <NA>      <NA>      
## 10 Canada  <NA>  <NA>        <NA>      <NA>        <NA>      <NA>      
## # ... with 222 more rows, and 6 more variables: legal <chr>,
## #   resources <chr>, russia <chr>, security <chr>, tourism <chr>,
## #   transport <chr>
 write.csv(cleantable, file="cleantable.csv")

Creating the categories 2

score2mean <- mean(country2df$country_score)
score2sd <- sd(country2df$country_score)


country2df <- country2df %>%
  mutate(sdmean = (country_score-score2mean)/score2sd)

scores2q <- quantile(country2df$sdmean, probs = c(0, 0.25, 0.5, 0.75, 1))

country2df <- country2df  %>%
  mutate(rank = ifelse(sdmean < scores2q[2], "very low", 
                       ifelse(sdmean>scores2q[2] & sdmean < scores2q[3], "low", 
                               ifelse(sdmean>scores2q[3] & sdmean < scores2q[4], "medium", "high"))))
  
cleantable2 <- country2df %>% spread(name, rank) %>%
  select(country, development:transport)  %>%
  group_by(country) %>%
 arrange(country, environment, diplomacy, legal, indigenous, development, transport, fisheries, resources, security)

 cleantable2
## # A tibble: 215 x 11
## # Groups:   country [22]
##    country development diplomacy environment fisheries indigenous legal
##    <chr>   <chr>       <chr>     <chr>       <chr>     <chr>      <chr>
##  1 Canada  <NA>        <NA>      high        <NA>      <NA>       <NA> 
##  2 Canada  <NA>        low       <NA>        <NA>      <NA>       <NA> 
##  3 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       low  
##  4 Canada  <NA>        <NA>      <NA>        <NA>      medium     <NA> 
##  5 Canada  high        <NA>      <NA>        <NA>      <NA>       <NA> 
##  6 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       <NA> 
##  7 Canada  <NA>        <NA>      <NA>        very low  <NA>       <NA> 
##  8 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       <NA> 
##  9 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       <NA> 
## 10 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       <NA> 
## # ... with 205 more rows, and 4 more variables: resources <chr>,
## #   security <chr>, tourism <chr>, transport <chr>
 write.csv(cleantable2, file="cleantable2.csv")

Creating the categories 3

scores3q <- quantile(country2df$sdmean, probs = c(0, 0.20, 0.40, 0.60, 1))

country3df <- country2df  %>%
  mutate(rank = ifelse(sdmean < scores3q[2], "very low", 
                       ifelse(sdmean>scores3q[2] & sdmean < scores3q[3], "low", 
                               ifelse(sdmean>scores3q[3] & sdmean < scores3q[4], "medium", "high"))))
  
cleantable3 <- country3df %>% spread(name, rank) %>%
  select(country, development:transport)  %>%
  group_by(country) %>%
 arrange(country, environment, diplomacy, legal, indigenous, development, transport, fisheries, resources, security)

 cleantable3
## # A tibble: 215 x 11
## # Groups:   country [22]
##    country development diplomacy environment fisheries indigenous legal
##    <chr>   <chr>       <chr>     <chr>       <chr>     <chr>      <chr>
##  1 Canada  <NA>        <NA>      high        <NA>      <NA>       <NA> 
##  2 Canada  <NA>        low       <NA>        <NA>      <NA>       <NA> 
##  3 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       low  
##  4 Canada  <NA>        <NA>      <NA>        <NA>      medium     <NA> 
##  5 Canada  high        <NA>      <NA>        <NA>      <NA>       <NA> 
##  6 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       <NA> 
##  7 Canada  <NA>        <NA>      <NA>        very low  <NA>       <NA> 
##  8 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       <NA> 
##  9 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       <NA> 
## 10 Canada  <NA>        <NA>      <NA>        <NA>      <NA>       <NA> 
## # ... with 205 more rows, and 4 more variables: resources <chr>,
## #   security <chr>, tourism <chr>, transport <chr>
 write.csv(cleantable3, file="cleantable3.csv")